"""Misc utility functions for kcidb."""
import hashlib
import re

from cki_lib.kcidb import CONSUMER_KCIDB_SCHEMA
from cki_lib.session import get_session

SESSION = get_session(__name__)

TRIAGEABLE_STATUS = {"ERROR", "FAIL"}
ORDERED_STATUS = CONSUMER_KCIDB_SCHEMA.json["$defs"]["status"]["enum"]
ORDERED_STATUS_MAP = {status: i for i, status in enumerate(ORDERED_STATUS)}


def patch_list_hash(patch_list):
    """Get hash of a list of patches."""
    if not patch_list:
        return ''

    patch_hash_list = []
    for patch_url in patch_list:
        # Transform /mbox/ url into /raw/ to get the patch diff only.
        # Patchwork mbox includes headers that can change after people reply to the patches.
        patch = SESSION.get(re.sub(r'/mbox/?$', '/raw/', patch_url))
        patch_hash_list.append(
            hashlib.sha256(patch.content).hexdigest()
        )

    merged_hashes = '\n'.join(patch_hash_list) + '\n'
    return hashlib.sha256(merged_hashes.encode('utf8')).hexdigest()


def unknown_issues(dw_all, tests=None):  # noqa: PLR0914
    # pylint: disable=too-many-locals
    """Get the list of tests that need to be reported.

    Args:
        dw_all: Instance of the response from the "checkout-all" endpoint.
        tests: Optional list of tests to filter. Defaults to `dw_all.tests`.

    Returns:
        List of tests missing triage or with regression.
    """
    if tests is None:
        tests = dw_all.tests

    test_lookup = {t.id: t for t in tests}
    testresult_lookup = {r.id: r for r in dw_all.testresults if r.test_id in test_lookup}

    # Group results into the tests
    for tr in testresult_lookup.values():
        test_lookup[tr.test_id].misc.setdefault("results", []).append(tr)

    # Gather triageable tests and results IDs into two sets
    triageable_test_ids = set()
    triageable_testresults_ids = set()
    for test in [t for t in tests if not t.waived]:
        if results := test.misc.get("results", []):
            if bad_results := {r.id for r in results if r.status in TRIAGEABLE_STATUS}:
                triageable_testresults_ids |= bad_results
        else:
            if test.status in TRIAGEABLE_STATUS:
                triageable_test_ids.add(test.id)

    # Separate regressions from triaged known issues
    regression_occurs = [i for i in dw_all.issueoccurrences if i.is_regression]
    triaging_occurs = [i for i in dw_all.issueoccurrences if not i.is_regression]

    # Compute triaged results and tests
    triaged_testresults_ids = {i.testresult_id for i in triaging_occurs if i.testresult_id}
    triaged_test_ids = {i.test_id for i in triaging_occurs if i.test_id}

    # Compute untriaged results and tests
    untriaged_testresults_ids = triageable_testresults_ids - triaged_testresults_ids
    untriaged_test_ids = triageable_test_ids - triaged_test_ids

    # Compute regressed results and tests
    regressed_testresults_ids = {i.testresult_id for i in regression_occurs if i.testresult_id}
    regressed_test_ids = {i.test_id for i in regression_occurs if i.test_id}

    # Account for tests with untriaged results or regressing results
    problematic_tr_ids = untriaged_testresults_ids | regressed_testresults_ids
    problematic_tr_test_ids = {testresult_lookup[r_id].test_id for r_id in problematic_tr_ids}
    # Overwrite test.status based on its worst problematic testresult
    for test_id in problematic_tr_test_ids:
        test = test_lookup[test_id]
        test.status = min(
            (tr.status for tr in test.misc["results"] if tr.id in problematic_tr_ids),
            key=ORDERED_STATUS_MAP.get,
        )

    problematic_test_ids = (
        problematic_tr_test_ids  # Tests with results either missing triage or with regression
        | untriaged_test_ids  # Tests missing triage
        | regressed_test_ids  # Tests triaged with regression
    )
    return [test_lookup[t] for t in problematic_test_ids]
